In [1]:
## Import necessary libraries and dataframe
import pandas as pd

data = pd.read_csv("Data\Connections.csv")
data.head(10)
Out[1]:
First Name Last Name URL Email Address Company Position Connected On
0 Wells Velasquez Maciel https://www.linkedin.com/in/wells-velasquez-ma... NaN SiteMinder Customer Solutions Consultant Level 3 11 Sep 2024
1 Patrick Canney https://www.linkedin.com/in/paddyblack-exe NaN SOTI Senior Technical Support Specialist 11 Sep 2024
2 Borja Martínez Ariza https://www.linkedin.com/in/borja-mart%C3%ADne... NaN Abbott Customer Service - Order Management Specialist 30 Apr 2024
3 Krutik Pednekar https://www.linkedin.com/in/krutikpednekar NaN Trane Technologies Standards Lead 24 Apr 2024
4 Nathan Woodward https://www.linkedin.com/in/nathan-woodward-42... NaN Avondata Systems Ltd Client Success Manager 23 Apr 2024
5 RIGAN NGANGOM https://www.linkedin.com/in/rigan-ngangom-17b0... NaN EDoors Inc Senior Team Lead 21 Feb 2024
6 Chiara Celidoni https://www.linkedin.com/in/chiaracelidoni NaN SiteMinder Strategic Account Director 21 Dec 2023
7 Daire O'Neill https://www.linkedin.com/in/daire-o-neill-3584762 NaN Irish Life Investment Managers Manager Reporting: Performance Measurement Team 21 Dec 2023
8 Dustin Aldridge https://www.linkedin.com/in/dustin-aldridge-98... NaN SiteMinder Premium Services Manager 01 Dec 2023
9 Rita Guembes, MCI https://www.linkedin.com/in/ritaguembesc NaN SiteMinder Customer Onboarding Success Specialist 30 Nov 2023
In [2]:
## Sort data by connection date and visualize
data = data.sort_values(by='Connected On')

import plotly.express as px

px.line(data.groupby(by='Connected On').count().reset_index(),
       x = 'Connected On',
       y = 'First Name',
       labels = {'First Name':'No. of Connections'},
       title = 'Connection Timeline')
In [3]:
## Now changing the date format and re-creating visual
data['Connected On'] = pd.to_datetime(data['Connected On'], format='%d %b %Y')

data = data.sort_values(by='Connected On')

grouped_data = data.groupby(by='Connected On').count().reset_index()

## Create the line plot
fig = px.line(grouped_data,
              x='Connected On',
              y='First Name',  # Assuming 'First Name' is a column representing connections
              labels={'First Name': 'No. of Connections'},
              title='Connection Timeline')
fig.show()
In [4]:
# Create a new column with the year and month
data['YearMonth'] = data['Connected On'].dt.to_period('M')  # Year and Month format YYYY-MM

# Group by the 'YearMonth' column and count the number of connections for each month
monthly_data = data.groupby('YearMonth').size().reset_index(name='ConnectionCount')

# Sort the data by the number of connections in descending order
top_months = monthly_data.sort_values(by='ConnectionCount', ascending=False).head(10)

# Print the top 10 months with the highest number of connections
print(top_months)
   YearMonth  ConnectionCount
39   2020-04               46
2    2017-02               39
23   2018-12               33
32   2019-09               27
6    2017-06               16
4    2017-04               15
52   2021-05               13
18   2018-07               13
5    2017-05               12
36   2020-01               11
In [5]:
## Now view the company column of each connection
data['Company']
Out[5]:
496      SAP SuccessFactors
495                  adidas
490               Qualtrics
493         Employment Hero
494                   Cisco
               ...         
4      Avondata Systems Ltd
3        Trane Technologies
2                    Abbott
1                      SOTI
0                SiteMinder
Name: Company, Length: 497, dtype: object
In [6]:
## Group and count the connections by company
group_company = data.groupby(by='Company').count().reset_index()
group_company
Out[6]:
Company First Name Last Name URL Email Address Position Connected On YearMonth
0 7AJ WORLD ENTERTAINMENT 1 1 1 0 1 1 1
1 7th Heaven 1 1 1 0 1 1 1
2 AIB 1 1 1 0 1 1 1
3 AIHR | Academy to Innovate HR 1 1 1 0 1 1 1
4 AKOFENA RENTALS LIMITED 1 1 1 0 1 1 1
... ... ... ... ... ... ... ... ...
290 owl.co 1 1 1 0 1 1 1
291 permanent tsb 1 1 1 0 1 1 1
292 truelink Consulting GmbH 1 1 1 0 1 1 1
293 www.cestandard.com 1 1 1 0 1 1 1
294 𝙃𝙖𝙧𝙢𝙤𝙣𝙞𝙘 𝘿𝙞𝙜𝙞𝙩𝙖𝙡 𝙈𝙖𝙧𝙠𝙚𝙩𝙞𝙣𝙜 1 1 1 0 1 1 1

295 rows × 8 columns

In [7]:
## Sorting the results by number of connections per company
group_company = group_company.sort_values(by='Connected On', ascending=False).reset_index(drop=True)
group_company
Out[7]:
Company First Name Last Name URL Email Address Position Connected On YearMonth
0 SiteMinder 100 100 100 3 100 100 100
1 SAP 38 38 38 0 38 38 38
2 Genesys 5 5 5 0 5 5 5
3 Salesforce 4 4 4 0 4 4 4
4 NUI Galway 4 4 4 0 4 4 4
... ... ... ... ... ... ... ... ...
290 GuestCentric Systems 1 1 1 0 1 1 1
291 HERO Recruitment Ltd. 1 1 1 0 1 1 1
292 HID 1 1 1 0 1 1 1
293 Hayes solicitors LLP 1 1 1 0 1 1 1
294 𝙃𝙖𝙧𝙢𝙤𝙣𝙞𝙘 𝘿𝙞𝙜𝙞𝙩𝙖𝙡 𝙈𝙖𝙧𝙠𝙚𝙩𝙞𝙣𝙜 1 1 1 0 1 1 1

295 rows × 8 columns

In [8]:
fig=px.bar(group_company[:150],
           x = 'Company',
           y = 'Connected On',
           labels = {'Connected On':'Number of Connections'},
           width = 1000,
           height = 800,
           title = 'Bar graph for companies that my connections work at'
          )
fig.show()
In [9]:
## Creating a treemap visualization with less companies
fig=px.treemap(group_company[:100], path = ['Company','Position'],
           values = 'Connected On',
           labels = {'Connected On':'Number of Connections'},
           width = 1000,
           height = 800,
           title = 'Bar graph for companies that my connections work at'
          )
fig.show()
C:\ProgramData\Anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

C:\ProgramData\Anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

In [10]:
## Finding the positions of each connection
data['Position'].value_counts()
Out[10]:
Director                                 5
Manager                                  5
Project Manager                          3
Business Development Manager             3
Account Executive                        3
                                        ..
Partner Integrations Team Leader         1
Developer                                1
L&D Trainer                              1
Software Tester                          1
Customer Solutions Consultant Level 3    1
Name: Position, Length: 442, dtype: int64
In [11]:
## Now only going to show the values above 20%
position_data = data['Position'].value_counts()/len(data)*100 > 0.20
In [12]:
## Get the count of these values
data['Position'].value_counts()[data['Position'].value_counts()/len(data)*100 > 0.20]
Out[12]:
Director                                 5
Manager                                  5
Project Manager                          3
Business Development Manager             3
Account Executive                        3
                                        ..
Partner Integrations Team Leader         1
Developer                                1
L&D Trainer                              1
Software Tester                          1
Customer Solutions Consultant Level 3    1
Name: Position, Length: 442, dtype: int64
In [13]:
# Create the bar chart
fig = px.bar(data.groupby('Position').size().reset_index(name='Count').sort_values(by='Count', ascending=False),
             x='Position',
             y='Count',  
             labels={'Number of Connections': 'Position'},
             width=1000,
             height=900,
             title='The various positions held by my LinkedIn connections'
            )

# Show the plot
fig.show()
In [14]:
pip install wordcloud matplotlib
Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.

Requirement already satisfied: wordcloud in c:\programdata\anaconda3\lib\site-packages (1.8.2.2)
Requirement already satisfied: matplotlib in c:\programdata\anaconda3\lib\site-packages (3.5.1)
Requirement already satisfied: numpy>=1.6.1 in c:\programdata\anaconda3\lib\site-packages (from wordcloud) (1.21.5)
[notice] A new release of pip is available: 23.0.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip
Requirement already satisfied: pillow in c:\programdata\anaconda3\lib\site-packages (from wordcloud) (9.0.1)
Requirement already satisfied: fonttools>=4.22.0 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (4.25.0)
Requirement already satisfied: packaging>=20.0 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (21.3)
Requirement already satisfied: python-dateutil>=2.7 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (2.8.2)
Requirement already satisfied: cycler>=0.10 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: pyparsing>=2.2.1 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (3.0.4)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\programdata\anaconda3\lib\site-packages (from matplotlib) (1.3.2)
Requirement already satisfied: six>=1.5 in c:\programdata\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
In [15]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt

def CreateWordCloud(text):
    wordcloud=WordCloud(width=1000, height=900,
                       background_color='black',
                       min_font_size = 10,
                       colormap = 'Set2').generate(text)
    
    fig=plt.figure(figsize=(15,10))
    plt.imshow(wordcloud, interpolation='bilinear')
    
    plt.show()
    return fig
In [16]:
## Converting Position data to a string
positions_text = ' '.join(data['Position'].dropna().astype(str))

# Call the function using the extracted text
CreateWordCloud(positions_text)
Out[16]: